# -*- coding: utf-8 -*-
import random
from zc_core.spiders.base import BaseSpider
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.dao.sku_dao import SkuDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.model.items import Box
from zc_core.util.done_filter_rds import DoneFilterRDS
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from cmcc.rules import *
from zc_core.util.order_deadline_filter import OrderItemFilter


class Full2Spider(BaseSpider):
    name = 'full2'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.5,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }
    item_url = 'http://b2bjoy.10086.cn/oscp/goods/product/goodsDetail.html?sn={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(Full2Spider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilterRDS(self.batch_no)
        # 订单采集截止日期
        self.order_filter = OrderItemFilter()
        # 每页订单量
        self.order_page_size = 10

        # self.proxy = 'http://180.108.129.164:4236'
        # self.cookies = {'JSESSIONID': '02157281AA2DBBB57D48C5123BAEE15C', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_9b6b89732a63413c9d80aaea4a56f46f'}
        # self.proxy = 'http://180.109.147.95:4232'
        # self.cookies = {'JSESSIONID': '56EB202A5FB0C7421E11BFA48FBDE321', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_c73d6b2c119949cd91b97a3bbff282b3'}
        # self.proxy = 'http://49.65.165.63:4232'
        # self.cookies = {'JSESSIONID': 'E93516DD15517B46BC498B80BE3C1FD1', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_dd34900361b949d1a318d778ce751175'}
        # self.proxy = 'http://117.62.165.241:4232'
        # self.cookies = {'JSESSIONID': '06D7F29D5FA0AEDDBB39306209729B91', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_1beb5b1d38b444e4870be060941ba69d'}

        # self.proxy = 'http://49.82.251.89:4243'
        # self.cookies = {'JSESSIONID': '7EBE66D372C16CC38AAEB67AE94AA404', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_00246c2859314ee5a50c1e568b99821b'}
        # self.proxy = 'http://122.192.227.109:4278'
        # self.cookies = {'JSESSIONID': 'E0E27A50B3A0E8D311E0CD26BD53167C', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_234d65a92e414776ae99ff9a592d3903'}
        self.proxy = 'http://106.110.91.137:4243'
        self.cookies = {'JSESSIONID': '4DA6955274BA12B4E599D9D869AB6398',
                        'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_a5d6169c4e794b70ac45307c43bb7a7a'}
        # self.proxy = 'http://49.68.185.57:4258'
        # self.cookies = {'JSESSIONID': 'ab42ee5876f434024ff0c9aefce5', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_b38914d6063a480288d71233b4b94cfd'}
        # self.proxy = 'http://114.235.185.17:4258'
        # self.cookies = {'JSESSIONID': 'ab8aa5d3a576e7f019579d078f81', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_fde92692ce194e36ba6b675eecf01a70'}
        # self.proxy = 'http://113.78.64.52:4231'
        # self.cookies = {'JSESSIONID': '9f07dfe519b3daa133d63a86cf9a', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_b60edfa3b00545b38c1f39cf9cf67802'}
        # self.proxy = 'http://27.42.157.179:4234'
        # self.cookies = {'JSESSIONID': 'a2eb20c80fff076383111e62a9b6', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_fac5769262404917be02551827eed86d'}
        # self.proxy = 'http://58.253.179.32:4234'
        # self.cookies = {'JSESSIONID': 'a32f6c15487f98086f1a66c9eb12', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_b5c78c999d6341228244c3b64bb4805d'}
        # self.proxy = 'http://122.192.227.203:4278'
        # self.cookies = {'JSESSIONID': '39c4d7b2efb45ada98994642be4f', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_c839e56932e44c2c95a4e364c974496b'}
        # self.proxy = 'http://49.68.187.33:4243'
        # self.cookies = {'JSESSIONID': '39c4d7b2efb45ada98994642be4f', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_a76630fd7d6c4958b8eeceefb187c3b0'}
        # self.proxy = 'http://114.235.185.49:4258'
        # self.cookies = {'JSESSIONID': '39c4d7b2efb45ada98994642be4f', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_ab1d3882196d4aff86fa503cd8eeb9b1'}
        # self.proxy = 'http://180.124.87.30:4258'
        # self.cookies = {'JSESSIONID': '39c4d7b2efb45ada98994642be4f', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_1d751471986645d89c5b0ba54efc254e'}

    def start_requests(self):
        settings = get_project_settings()
        max_offline_time = settings.get('MAX_OFFLINE_TIME', 2)
        # cookies = SeleniumLogin().get_cookies()
        if not self.cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', self.cookies)

        pool_list = SkuPoolDao().get_sku_pool_list()
        # pool_list = SkuDao().get_batch_sku_list(self.batch_no)
        self.logger.info('全量: %s' % (len(pool_list)))
        dist_list = [x for x in pool_list if
                     (not self.done_filter.contains(x.get('_id')) and x.get('offlineTime', 0) <= max_offline_time)]
        self.logger.info('目标: %s' % (len(dist_list)))
        random.shuffle(dist_list)
        for sku in dist_list:
            sku_id = sku.get('_id')
            offline_time = sku.get('offlineTime', 0)
            if offline_time > settings.get('MAX_OFFLINE_TIME', 1):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('---->已采: %s', sku_id)
                continue

            page_no = 1
            url = self.item_url.format(sku_id)
            yield Request(
                url=url,
                cookies=self.cookies,
                callback=self.parse_item_data,
                errback=self.error_back,
                meta={
                    'proxy': self.proxy,
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'orderPageSize': self.order_page_size,
                    'pageNo': page_no,
                    'skuId': sku_id,
                },
                headers={
                    'Host': 'b2bjoy.10086.cn',
                    'Proxy-Connection': 'keep-alive',
                    'Cache-Control': 'max-age=0',
                    'Upgrade-Insecure-Requests': '1',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                    'Referer': 'http://b2bjoy.10086.cn/oscp/home/main.html',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
                },
                priority=250,
                dont_filter=True
            )

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        curr_page = meta.get('pageNo')

        # 商品
        data = parse_item_data(response)
        if data:
            self.logger.info('商品: [%s]' % data.get('skuId'))
            self.done_filter.put(sku_id)
            yield data
        else:
            self.logger.info('下架: [%s]' % meta.get('skuId'))

        # 订单
        order_list, need_next_page = parse_order_item(response)
        if order_list and len(order_list):
            self.logger.info('订单: sku=%s, page=%s, cnt=%s' % (sku_id, curr_page, len(order_list)))
            yield Box('order_item', self.batch_no, order_list)
        else:
            self.logger.info('无单: sku=%s, page=%s' % (sku_id, curr_page))
